*Get full sample with fields*


clear
use "Data\start_sample.dta"

*keep only relevant variables
keep pnr aar age koen fined final_educ wage_start_mean_ambition wage_growth_ambition ambition_type_k_4_s wage_start_mean_ambition_s wage_growth_ambition_s final_educ_level_4 final_field_num final_field

**Merge on variables**

*Goldin
merge 1:1 pnr aar using "Data\Goldin_quotients_for_all.dta", keepusing(goldin_quotient goldin_quotient_s)

keep if _merge==3
drop _merge

*CEO probability
merge 1:1 pnr aar using "Data\manager_training.dta", keepusing(ever_top_manager)
drop if _merge==2
drop _merge
rename ever_top_manager temp
bysort pnr: egen ever_top_manager=max(temp)
drop temp

*Age at first child
merge 1:1 pnr aar using "Data\first_child.dta", keepusing(first_kid_year)
drop if _merge==2
drop _merge

gen dum=.
replace dum=age if aar==first_kid_year & first_kid_year!=.
sort pnr aar
by pnr: egen age_at_first_kid=min(dum)
drop dum

gen dum=.
replace dum=age if aar==first_kid_year & first_kid_year!=. & koen=="2"
sort pnr aar
by pnr: egen age_at_first_kid_women=min(dum)
drop dum first_kid_year

*Life wages
merge 1:1 pnr aar using "Data\life_wage_ambition_final.dta", keepusing(p_life_wage)

drop if _merge==2
drop _merge

*Life earnings
merge 1:1 pnr aar using "Data\life_earnings_ambition_final.dta", keepusing(p_life_earnings earnings_deflated)

drop if _merge==2
drop _merge

gen temp=exp(earnings_deflated)
bysort pnr: egen temp2=sum(temp)
gen life_time_earnings=temp2 if temp2>0
drop temp temp2

*Personal wealth
merge 1:1 pnr aar using "Data\wealth_ambition_final.dta", keepusing(wealth_deflated)

drop if _merge==2
drop _merge

bysort pnr: egen avr_wealth=mean(wealth_deflated)

gen temp=wealth_deflated if age==50
bysort pnr: egen wealth_50=mean(temp)
drop temp

*Labor suppuly variables from RAS
merge 1:1 pnr aar using "Data\Core_datasets\ras_ftpt.dta"

drop if _merge==2
drop _merge

gen participation=0 if ftpt_ras==0
replace participation=1 if ftpt_ras==0.5 | ftpt_ras==1

gen ft_if_participation=0 if participation==1
replace ft_if_participation=1 if ftpt_ras==1

sort pnr aar
by pnr: egen avr_participation=mean(participation)
by pnr: egen avr_ft_if_par=mean(ft_if_participation)

*Save
save "Data\before_reduction.dta", replace


**Reduce to program level**
*First, panel dimension of individuals

bysort pnr: egen last_year=max(age)
keep if last_year==age
drop last_year

*Second, reduced in the individuals dimension to get program averages
drop if final_educ==. | final_educ==1
sort final_educ


foreach i in ever_top_manager age_at_first_kid age_at_first_kid_women life_time_earnings avr_wealth wealth_50 avr_participation avr_ft_if_par{
rename `i' temp
by final_educ: egen `i'=mean(temp)
drop temp
}

by final_educ: egen educ_count=count(final_educ)

collapse (first) fined final_educ_level_4 final_field_num final_field wage_start_mean_ambition wage_growth_ambition wage_start_mean_ambition_s wage_growth_ambition_s ambition_type_k_4_s goldin_quotient goldin_quotient_s p_life_wage p_life_earnings ever_top_manager age_at_first_kid age_at_first_kid_women life_time_earnings avr_wealth wealth_50 avr_participation avr_ft_if_par educ_count, by(final_educ)


*Save regressions sample*
save "Data\regression_sample.dta", replace
